The following is an EDA used for findings pertinent to the Fall 2022 project symposium. @author: Atharva Shirke
Import packages (pathlib, pandas, numpy, sklearn, matplotlib, geopandas)
import pyproj
import shapely
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
from pathlib import Path
import folium
import mapclassify
import geopandas
import rtree
espg4326 = pyproj.CRS('+proj=longlat +datum=WGS84 +no_defs +type=crs')
dataPath = Path("../data/")
def makeDataFrame(relative_path, geojson=False):
"""
Given the relative path of a csv in the data
directory, return a pandas dataframe.
"""
targetPath = dataPath / Path(relative_path)
if geojson:
return geopandas.read_file(targetPath)
return pd.read_csv(targetPath)
Using this function, let's start by loading up the zoning information we have for the city of Berkeley.
zones = "raw/zoning.geojson"
zones = makeDataFrame(zones, geojson=True)
zones
| shape_st_2 | area | shape_stle | blocks_id | perimeter | lasteditor | height | lastupdate | objectid | genplan | zoneclass | new_block | shape_st_1 | shape_star | zonedesc | baseelev | zone | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 897.965039154 | 256240.562 | 897.96503916 | 3 | 2942.173 | None | 0 | None | 1 | LDR | R-1H | 062 2949 | 23871.2937675 | 23871.2939453 | None | 0 | R | MULTIPOLYGON (((-122.27356 37.90460, -122.2735... |
| 1 | 452.362820501 | 111175 | 452.3628205 | 5 | 1484.545 | None | 0 | None | 2 | LDR | R-1H | 063 2951 | 10322.7637992 | 10322.763916 | None | 0 | R | MULTIPOLYGON (((-122.26997 37.90433, -122.2699... |
| 2 | 807.498810905 | 170184.812 | 807.49881091 | 6 | 2637.073 | None | 0 | None | 3 | LDR | R-1H | 062 2942 | 16129.8272618 | 16129.8276367 | None | 0 | R | MULTIPOLYGON (((-122.27484 37.90388, -122.2745... |
| 3 | 1011.43398248 | 307217.938 | 1011.43398248 | 7 | 3510.71 | None | 0 | None | 4 | LDR | R-1H | 063 3100 | 28537.9496481 | 28537.951416 | None | 0 | R | MULTIPOLYGON (((-122.26898 37.90397, -122.2684... |
| 4 | 433.015174892 | 101674.438 | 433.01517489 | 8 | 1421.052 | None | 0 | None | 5 | LDR | R-1H | 062 2945 | 9440.55918856 | 9440.55957031 | None | 0 | R | MULTIPOLYGON (((-122.27198 37.90388, -122.2719... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1855 | 338.715577619 | 157345.375 | 338.71557762 | 436 | 1725.794 | None | 0 | None | 1856 | M | MULI | 060 2355 | 7153.08030928 | 7153.08007813 | None | 0 | MULI | MULTIPOLYGON (((-122.29736 37.88114, -122.2974... |
| 1856 | 172.74327643 | 57831.625 | 172.74327643 | 1113 | 1126.78 | None | 0 | None | 1857 | M | MULI | 054 1763 | 1228.491685 | 1228.49169922 | None | 0 | MULI | MULTIPOLYGON (((-122.28949 37.85835, -122.2893... |
| 1857 | 157.757642818 | 63278.188 | 381.15307606 | 1532 | 1248.658 | None | 0 | None | 1858 | NC | C-SA | 052 1527 | 335.41452172 | 6088.6953125 | None | 0 | C | MULTIPOLYGON (((-122.26951 37.84853, -122.2695... |
| 1858 | 105.835958641 | 77694.688 | 445.48575821 | 1556 | 1456.261 | None | 0 | None | 1859 | NC | C-AC | 052 1435 | 480.870974915 | 7457.67993164 | None | 0 | C | MULTIPOLYGON (((-122.27333 37.84618, -122.2733... |
| 1859 | 129.49176163 | 28538.438 | 218.92572458 | 1525 | 718.455 | None | 0 | None | 1860 | MDR | R-2A | 052 1533 | 996.79170825 | 2649.86743164 | None | 0 | R | MULTIPOLYGON (((-122.27278 37.84918, -122.2732... |
1860 rows × 18 columns
zones.info()
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 1860 entries, 0 to 1859 Data columns (total 18 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 shape_st_2 1860 non-null object 1 area 1860 non-null object 2 shape_stle 1860 non-null object 3 blocks_id 1860 non-null object 4 perimeter 1860 non-null object 5 lasteditor 0 non-null object 6 height 1860 non-null object 7 lastupdate 0 non-null object 8 objectid 1860 non-null object 9 genplan 1834 non-null object 10 zoneclass 1860 non-null object 11 new_block 1802 non-null object 12 shape_st_1 1860 non-null object 13 shape_star 1860 non-null object 14 zonedesc 0 non-null object 15 baseelev 1860 non-null object 16 zone 1816 non-null object 17 geometry 1860 non-null geometry dtypes: geometry(1), object(17) memory usage: 261.7+ KB
Let's drop all null columns before we start analyzing the data.
zones = zones.drop(columns=["zonedesc", "lastupdate", "lasteditor"])
Based on the specifications of GIS data, we know what shape_*, geometry, and area columns do. These are components used for defining and drawing the polygons we use for defining zones. Short descriptions below:
Because we know all the information is encoded in geometry, let's drop the cols with redundant information and put it in a new dataframe.
blocks = zones.drop(columns=["shape_star", "shape_stle", "shape_st_1", "shape_st_2"])
blocks
| area | blocks_id | perimeter | height | objectid | genplan | zoneclass | new_block | baseelev | zone | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 256240.562 | 3 | 2942.173 | 0 | 1 | LDR | R-1H | 062 2949 | 0 | R | MULTIPOLYGON (((-122.27356 37.90460, -122.2735... |
| 1 | 111175 | 5 | 1484.545 | 0 | 2 | LDR | R-1H | 063 2951 | 0 | R | MULTIPOLYGON (((-122.26997 37.90433, -122.2699... |
| 2 | 170184.812 | 6 | 2637.073 | 0 | 3 | LDR | R-1H | 062 2942 | 0 | R | MULTIPOLYGON (((-122.27484 37.90388, -122.2745... |
| 3 | 307217.938 | 7 | 3510.71 | 0 | 4 | LDR | R-1H | 063 3100 | 0 | R | MULTIPOLYGON (((-122.26898 37.90397, -122.2684... |
| 4 | 101674.438 | 8 | 1421.052 | 0 | 5 | LDR | R-1H | 062 2945 | 0 | R | MULTIPOLYGON (((-122.27198 37.90388, -122.2719... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1855 | 157345.375 | 436 | 1725.794 | 0 | 1856 | M | MULI | 060 2355 | 0 | MULI | MULTIPOLYGON (((-122.29736 37.88114, -122.2974... |
| 1856 | 57831.625 | 1113 | 1126.78 | 0 | 1857 | M | MULI | 054 1763 | 0 | MULI | MULTIPOLYGON (((-122.28949 37.85835, -122.2893... |
| 1857 | 63278.188 | 1532 | 1248.658 | 0 | 1858 | NC | C-SA | 052 1527 | 0 | C | MULTIPOLYGON (((-122.26951 37.84853, -122.2695... |
| 1858 | 77694.688 | 1556 | 1456.261 | 0 | 1859 | NC | C-AC | 052 1435 | 0 | C | MULTIPOLYGON (((-122.27333 37.84618, -122.2733... |
| 1859 | 28538.438 | 1525 | 718.455 | 0 | 1860 | MDR | R-2A | 052 1533 | 0 | R | MULTIPOLYGON (((-122.27278 37.84918, -122.2732... |
1860 rows × 11 columns
Load in the visualization.
blocks.explore()
This visualization has brought us some interesting insights:
Let's drop all the z-axis information columns and recompute areas and perimeters.
blocks = blocks.drop(columns=["baseelev", "height"])
blocks
| area | blocks_id | perimeter | objectid | genplan | zoneclass | new_block | zone | geometry | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 256240.562 | 3 | 2942.173 | 1 | LDR | R-1H | 062 2949 | R | MULTIPOLYGON (((-122.27356 37.90460, -122.2735... |
| 1 | 111175 | 5 | 1484.545 | 2 | LDR | R-1H | 063 2951 | R | MULTIPOLYGON (((-122.26997 37.90433, -122.2699... |
| 2 | 170184.812 | 6 | 2637.073 | 3 | LDR | R-1H | 062 2942 | R | MULTIPOLYGON (((-122.27484 37.90388, -122.2745... |
| 3 | 307217.938 | 7 | 3510.71 | 4 | LDR | R-1H | 063 3100 | R | MULTIPOLYGON (((-122.26898 37.90397, -122.2684... |
| 4 | 101674.438 | 8 | 1421.052 | 5 | LDR | R-1H | 062 2945 | R | MULTIPOLYGON (((-122.27198 37.90388, -122.2719... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1855 | 157345.375 | 436 | 1725.794 | 1856 | M | MULI | 060 2355 | MULI | MULTIPOLYGON (((-122.29736 37.88114, -122.2974... |
| 1856 | 57831.625 | 1113 | 1126.78 | 1857 | M | MULI | 054 1763 | MULI | MULTIPOLYGON (((-122.28949 37.85835, -122.2893... |
| 1857 | 63278.188 | 1532 | 1248.658 | 1858 | NC | C-SA | 052 1527 | C | MULTIPOLYGON (((-122.26951 37.84853, -122.2695... |
| 1858 | 77694.688 | 1556 | 1456.261 | 1859 | NC | C-AC | 052 1435 | C | MULTIPOLYGON (((-122.27333 37.84618, -122.2733... |
| 1859 | 28538.438 | 1525 | 718.455 | 1860 | MDR | R-2A | 052 1533 | R | MULTIPOLYGON (((-122.27278 37.84918, -122.2732... |
1860 rows × 9 columns
blocks.info()
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 1860 entries, 0 to 1859 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 area 1860 non-null object 1 blocks_id 1860 non-null object 2 perimeter 1860 non-null object 3 objectid 1860 non-null object 4 genplan 1834 non-null object 5 zoneclass 1860 non-null object 6 new_block 1802 non-null object 7 zone 1816 non-null object 8 geometry 1860 non-null geometry dtypes: geometry(1), object(8) memory usage: 130.9+ KB
With 1860 entries, we can see that some features (genplan, new_block, zone) are not fully populated. This can cause problems for us as we collate data from other sources, so it's really important we clean these issues now and impute any necessary information.
blocks["genplan"].unique()
array(['LDR', 'BC', 'MDR', 'NC', 'X', 'HDR', 'M', 'MU', 'I', 'OS', 'W',
'DT', 'RMU', None], dtype=object)
len(blocks[blocks["genplan"].isna()])
26
Looks like we have 26 null values for this feature. The genplan identifiers correspond to codes outlined in Berkeley's Land Use Element of the published General Plan linked here.
A synopsis of land use classifications:
residential, community services, schools, home occupations, recreational uses, and open spaces. Building intensity: 1-10 dwelling units per net acres, not including secondary units, population density: <=22 persons per acre.
medium sized multi-family structures. Building intensity: 20-40 dwelling units per net acre, population density: 44-88 persons per acre.
near transit, the Downtown, the University campus, or BART. Building intensity: 40-100 dwelling units per net acre, population density: 88-220 persons per net acre.
structures, in close proximity to transit and major shopping and employment centers. Building intensity: 40-100 dwelling units per net acre, population density: 88-220 persons per net acre.
commercial development, and multi-family residential structures. Population density: 44-88 persons per acre.
family residential structures. Building intensity will generally range from a Floor Area Ratio (FAR) of less than 1 to an FAR of 5. Population density: 44-88 persons per acre
density: 22-44 persons per acre, where housing is allowed.
Land use classifications also have corresponding compatible zoning district types:
blocks[blocks["genplan"].isna()]
| area | blocks_id | perimeter | objectid | genplan | zoneclass | new_block | zone | geometry | |
|---|---|---|---|---|---|---|---|---|---|
| 1682 | 0 | 0 | 0 | 1847 | None | C-DMU Corr | None | None | MULTIPOLYGON (((-122.26730 37.86573, -122.2669... |
| 1722 | 65.443 | 0 | 281.056 | 1719 | None | R-2 | None | None | MULTIPOLYGON (((-122.28377 37.86140, -122.2837... |
| 1724 | 146.194 | 0 | 250.698 | 1721 | None | R-2 | None | None | MULTIPOLYGON (((-122.28400 37.86174, -122.2839... |
| 1725 | 2.341 | 0 | 31.723 | 1722 | None | R-2 | None | None | MULTIPOLYGON (((-122.28377 37.86140, -122.2837... |
| 1781 | 0 | 0 | 0 | 1848 | None | C-DMU Buff | None | None | MULTIPOLYGON (((-122.26692 37.86578, -122.2667... |
| 1810 | 0 | 0 | 0 | 1849 | None | C-DMU Corr | None | None | MULTIPOLYGON (((-122.26740 37.86661, -122.2670... |
| 1811 | 0 | 0 | 0 | 1850 | None | C-DMU Buff | None | None | MULTIPOLYGON (((-122.26701 37.86666, -122.2666... |
| 1812 | 0 | 0 | 0 | 1851 | None | C-DMU Buff | None | None | MULTIPOLYGON (((-122.26593 37.86702, -122.2662... |
| 1834 | 0 | 0 | 0 | 1830 | None | C-DMU Buff | None | None | MULTIPOLYGON (((-122.26772 37.87387, -122.2672... |
| 1835 | 0 | 0 | 0 | 1831 | None | C-DMU Corr | None | None | MULTIPOLYGON (((-122.26840 37.87378, -122.2677... |
| 1836 | 0 | 0 | 0 | 1832 | None | C-DMU Oute | None | None | MULTIPOLYGON (((-122.26837 37.87349, -122.2671... |
| 1837 | 0 | 0 | 0 | 1833 | None | C-DMU Buff | None | None | MULTIPOLYGON (((-122.26949 37.87293, -122.2695... |
| 1838 | 0 | 0 | 0 | 1834 | None | C-DMU Corr | None | None | MULTIPOLYGON (((-122.26873 37.87378, -122.2686... |
| 1839 | 0 | 0 | 0 | 1835 | None | R-2A | None | None | MULTIPOLYGON (((-122.26964 37.87367, -122.2690... |
| 1840 | 0 | 0 | 0 | 1836 | None | C-DMU Corr | None | None | MULTIPOLYGON (((-122.26884 37.86791, -122.2689... |
| 1841 | 0 | 0 | 0 | 1837 | None | C-DMU Buff | None | None | MULTIPOLYGON (((-122.26892 37.86790, -122.2688... |
| 1842 | 0 | 0 | 0 | 1838 | None | C-DMU Corr | None | None | MULTIPOLYGON (((-122.26864 37.86740, -122.2680... |
| 1843 | 0 | 0 | 0 | 1839 | None | C-DMU Buff | None | None | MULTIPOLYGON (((-122.26840 37.86706, -122.2683... |
| 1844 | 0 | 0 | 0 | 1840 | None | C-DMU Corr | None | None | MULTIPOLYGON (((-122.26793 37.86654, -122.2678... |
| 1845 | 0 | 0 | 0 | 1841 | None | C-DMU Buff | None | None | MULTIPOLYGON (((-122.26835 37.86649, -122.2682... |
| 1846 | 0 | 0 | 0 | 1842 | None | R-3 | None | None | MULTIPOLYGON (((-122.26915 37.86637, -122.2691... |
| 1847 | 0 | 0 | 0 | 1843 | None | R-3 | None | None | MULTIPOLYGON (((-122.26988 37.86539, -122.2686... |
| 1848 | 0 | 0 | 0 | 1844 | None | C-DMU Buff | None | None | MULTIPOLYGON (((-122.26868 37.86555, -122.2684... |
| 1849 | 0 | 0 | 0 | 1845 | None | C-DMU Corr | None | None | MULTIPOLYGON (((-122.26825 37.86561, -122.2678... |
| 1850 | 0 | 0 | 0 | 1846 | None | C-SA | None | None | MULTIPOLYGON (((-122.26572 37.86426, -122.2659... |
| 1851 | 0 | 0 | 0 | 1852 | None | C-DMU Oute | None | None | MULTIPOLYGON (((-122.26742 37.86683, -122.2675... |
Most of the areas with no gen plans are designated "C-DMU". These are Commercial Downtown Mixed Use areas. Looking at the visualization, it seems these are institutional buildings with commercial centers (ex: Berkeley Way West). For our use case, it's appropriate to label these as downtown (DT), since the genplan issue seems to stem from the lack of a name for a insitution/commercial hybrid structure.
indexes = blocks.loc[blocks["genplan"].isna() & blocks["zoneclass"].str.startswith("C-DMU")].index
blocks.loc[indexes, "genplan"] = "DT"
blocks[blocks["genplan"].isna()]
| area | blocks_id | perimeter | objectid | genplan | zoneclass | new_block | zone | geometry | |
|---|---|---|---|---|---|---|---|---|---|
| 1722 | 65.443 | 0 | 281.056 | 1719 | None | R-2 | None | None | MULTIPOLYGON (((-122.28377 37.86140, -122.2837... |
| 1724 | 146.194 | 0 | 250.698 | 1721 | None | R-2 | None | None | MULTIPOLYGON (((-122.28400 37.86174, -122.2839... |
| 1725 | 2.341 | 0 | 31.723 | 1722 | None | R-2 | None | None | MULTIPOLYGON (((-122.28377 37.86140, -122.2837... |
| 1839 | 0 | 0 | 0 | 1835 | None | R-2A | None | None | MULTIPOLYGON (((-122.26964 37.87367, -122.2690... |
| 1846 | 0 | 0 | 0 | 1842 | None | R-3 | None | None | MULTIPOLYGON (((-122.26915 37.86637, -122.2691... |
| 1847 | 0 | 0 | 0 | 1843 | None | R-3 | None | None | MULTIPOLYGON (((-122.26988 37.86539, -122.2686... |
| 1850 | 0 | 0 | 0 | 1846 | None | C-SA | None | None | MULTIPOLYGON (((-122.26572 37.86426, -122.2659... |
These are our last few blocks without genplans. Let's put them into a df and explore to figure out what's up.
outliers = blocks[blocks["genplan"].isna()]
outliers.explore()
One of these objects (id: 1725) is a line with no data (potentially a bad input). We'll remove that one from our dataset. The others seem to be directly adjacent to special non-residential structures (open spaces, stores, fire and police stations). In all cases they are residential areas, and therefore should be given a residential zone code. We'll assign Residential Mixed Use (RMU) gen codes to these spaces.
blocks.drop(index=1725)
outliers.drop(index=1725)
blocks.loc[outliers.index, "genplan"] = "RMU"
blocks.info()
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 1860 entries, 0 to 1859 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 area 1860 non-null object 1 blocks_id 1860 non-null object 2 perimeter 1860 non-null object 3 objectid 1860 non-null object 4 genplan 1860 non-null object 5 zoneclass 1860 non-null object 6 new_block 1802 non-null object 7 zone 1816 non-null object 8 geometry 1860 non-null geometry dtypes: geometry(1), object(8) memory usage: 130.9+ KB
Having blocks without new_block assignments is ok. We'll use blocks_id as our primary key for blocks, and maintain new_block feature for when we collate this data with other sets that may only reference areas with new_block ids.
blocks["zone"].unique()
array(['R', 'C', None, 'MM', 'MUR', 'MULI', 'M', 'U', 'X', 'SP', 'C-W'],
dtype=object)
nozones = blocks[blocks["zone"].isna()]
nozones.explore()
Same deal here with zones. It seems like areas with mixed usage have not been labeled correctly in certain areas. Again, for our use cases it's best to label them ourselves with the appropriate label according to the outlined standards.
blocks.loc[nozones["zoneclass"].index, "zone"] = "MUR"
blocks.info()
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 1860 entries, 0 to 1859 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 area 1860 non-null object 1 blocks_id 1860 non-null object 2 perimeter 1860 non-null object 3 objectid 1860 non-null object 4 genplan 1860 non-null object 5 zoneclass 1860 non-null object 6 new_block 1802 non-null object 7 zone 1860 non-null object 8 geometry 1860 non-null geometry dtypes: geometry(1), object(8) memory usage: 130.9+ KB
Awesome! Our dataset is clean and interpretable. We can now integrate other datasets and do some analysis. We'll start by integrating information about service inquiries to the city via 311 calls.
inquiries = makeDataFrame("raw/311.csv")
inquiries
| Case_ID | Date_Opened | Case_Status | Date_Closed | Request_Category | Request_SubCategory | Request_Detail | Object_Type | APN | Street_Address | City | State | Neighborhood | Latitude | Longitude | Location | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 121000877593 | 09/16/2021 06:23:23 AM | Closed | 09/20/2021 11:22:22 AM | Facilities, Electrical & Property Management | Parks/Marina Building Services | Keys / Locks | Property | NaN | Intersection of Browning and Addison, BERKELEY... | Berkeley | CA | Berkeley | NaN | NaN | NaN |
| 1 | 121000876647 | 09/13/2021 10:50:00 AM | Open | NaN | Refuse and Recycling | Residential | Residential Bulky Pickup | Property | 054 180702800 | 1722 DWIGHT WAY | Berkeley | CA | Berkeley | 37.862656 | -122.275461 | (37.86265624, -122.27546088) |
| 2 | 121000809740 | 11/06/2020 04:51:00 PM | Closed | 11/09/2020 01:52:57 AM | General Questions/information | Miscellaneous | Miscellaneous Service Request | Individual | NaN | NaN | Berkeley | CA | Berkeley | NaN | NaN | NaN |
| 3 | 121000809739 | 11/06/2020 04:38:00 PM | Closed | 11/09/2020 01:41:12 AM | General Questions/information | Miscellaneous | Miscellaneous Service Request | Property | 060 249305600 | 1411 GRIZZLY PEAK BLVD | Berkeley | CA | Berkeley | 37.884799 | -122.247874 | (37.88479918, -122.24787412) |
| 4 | 121000793663 | 09/01/2020 11:32:00 AM | Closed | 09/01/2020 11:36:00 AM | Other Account Services and Billing | Marina | Payment Collection - Marina | Individual | NaN | NaN | Berkeley | CA | Berkeley | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 654695 | 121000971475 | 12/04/2022 12:21:27 PM | Open | NaN | General Questions/information | Miscellaneous | Miscellaneous Internet Request | Property | 057 208701100 | 1029 HEARST AVE | Berkeley | CA | Berkeley | 37.870447 | -122.293627 | (37.87044703, -122.29362715) |
| 654696 | 121000971482 | 12/04/2022 04:16:35 PM | Open | NaN | General Questions/information | Miscellaneous | Miscellaneous Internet Request | Unknown | NaN | NaN | Berkeley | CA | Berkeley | NaN | NaN | NaN |
| 654697 | 121000971476 | 12/04/2022 12:53:20 PM | Open | NaN | General Questions/information | Miscellaneous | Miscellaneous Internet Request | Unknown | NaN | NaN | Berkeley | CA | Berkeley | NaN | NaN | NaN |
| 654698 | 121000971481 | 12/04/2022 04:04:51 PM | Open | NaN | Streets, Utilities, and Transportation | Clean City Program | Illegal Dumping - Internet Request | Property | 052 152201100 | 1615 SIXTY-THIRD ST | Berkeley | CA | Berkeley | 37.847632 | -122.275050 | (37.84763248, -122.27505004) |
| 654699 | 121000971478 | 12/04/2022 01:35:38 PM | Open | NaN | General Questions/information | Miscellaneous | Miscellaneous Internet Request | Unknown | NaN | NaN | Berkeley | CA | Berkeley | NaN | NaN | NaN |
654700 rows × 16 columns
inquiries.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 654700 entries, 0 to 654699 Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Case_ID 654700 non-null int64 1 Date_Opened 654700 non-null object 2 Case_Status 654700 non-null object 3 Date_Closed 621774 non-null object 4 Request_Category 654700 non-null object 5 Request_SubCategory 654700 non-null object 6 Request_Detail 654700 non-null object 7 Object_Type 654700 non-null object 8 APN 375370 non-null object 9 Street_Address 413262 non-null object 10 City 654700 non-null object 11 State 654700 non-null object 12 Neighborhood 654700 non-null object 13 Latitude 370569 non-null float64 14 Longitude 370569 non-null float64 15 Location 370569 non-null object dtypes: float64(2), int64(1), object(13) memory usage: 79.9+ MB
inquiries["Neighborhood"].unique()
array(['Berkeley'], dtype=object)
Looks like only ~50% of the records have specific location data. Let's filter these out.
inquiries.drop(index=inquiries[inquiries["Latitude"].isna()].index, inplace=True)
# We only want unique cases
inquiries = inquiries.groupby('Case_ID', as_index=False).first()
inquiries.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 370562 entries, 0 to 370561 Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Case_ID 370562 non-null int64 1 Date_Opened 370562 non-null object 2 Case_Status 370562 non-null object 3 Date_Closed 342846 non-null object 4 Request_Category 370562 non-null object 5 Request_SubCategory 370562 non-null object 6 Request_Detail 370562 non-null object 7 Object_Type 370562 non-null object 8 APN 370562 non-null object 9 Street_Address 370562 non-null object 10 City 370562 non-null object 11 State 370562 non-null object 12 Neighborhood 370562 non-null object 13 Latitude 370562 non-null float64 14 Longitude 370562 non-null float64 15 Location 370562 non-null object dtypes: float64(2), int64(1), object(13) memory usage: 45.2+ MB
We'll need a method to join a dataframe with our blocks data by latitude and longitude. We'll then add 2 new features to the dataset:
def makeGeoDataFrame(df):
"""
Given a pandas dataframe with columns=["Latitude", "Longitude"],
return a GeoDataFrame with point geometry
"""
gdf = geopandas.GeoDataFrame(
df, geometry=geopandas.points_from_xy(df["Longitude"], df["Latitude"]), crs=espg4326
)
return gdf
def joinOnCoordinate(polys, points):
"""
Given a GeoDataframe with polygons and
a GeoDataframe with point geometries, return
a GeoDataframe of the join.
"""
points.crs = polys.crs
joined = points.sjoin_nearest(polys, how="right")
return joined
inquiries = inquiries.loc[:, ["Request_Category", "APN", "Latitude", "Longitude"]]
counts = inquiries.groupby(["Latitude", "Longitude"]).count()["Request_Category"]
requestCategories = inquiries.groupby(["Latitude", "Longitude"]).agg(lambda x: ','.join(x.unique()))["Request_Category"]
inquiries = inquiries.groupby(["Latitude", "Longitude"]).first()
inquiries["serviceRequests"] = counts
inquiries["requestCategories"] = requestCategories
inquiries = inquiries.reset_index()
inquiries
| Latitude | Longitude | Request_Category | APN | serviceRequests | requestCategories | |
|---|---|---|---|---|---|---|
| 0 | 37.845970 | -122.275596 | Refuse and Recycling | 052 143600102 | 6 | Refuse and Recycling |
| 1 | 37.846022 | -122.275217 | Refuse and Recycling | 052 143501200 | 8 | Refuse and Recycling,General Questions/informa... |
| 2 | 37.846045 | -122.275786 | Refuse and Recycling | 052 143600200 | 20 | Refuse and Recycling,Streets, Utilities, and T... |
| 3 | 37.846141 | -122.275065 | Streets, Utilities, and Transportation | 052 143501100 | 5 | Streets, Utilities, and Transportation,Refuse ... |
| 4 | 37.846147 | -122.275633 | Refuse and Recycling | 052 143600101 | 14 | Refuse and Recycling,Streets, Utilities, and T... |
| ... | ... | ... | ... | ... | ... | ... |
| 24900 | 37.904624 | -122.272952 | Refuse and Recycling | 062 294603001 | 5 | Refuse and Recycling,Streets, Utilities, and T... |
| 24901 | 37.904645 | -122.273908 | Refuse and Recycling | 062 294900200 | 5 | Refuse and Recycling,General Questions/informa... |
| 24902 | 37.904723 | -122.273179 | Refuse and Recycling | 062 294603200 | 3 | Refuse and Recycling |
| 24903 | 37.904787 | -122.273322 | Streets, Utilities, and Transportation | 062 294603300 | 3 | Streets, Utilities, and Transportation,Refuse ... |
| 24904 | 37.904851 | -122.273458 | Refuse and Recycling | 062 294603400 | 11 | Refuse and Recycling,Parks, Trees and Vegetati... |
24905 rows × 6 columns
inquiries = makeGeoDataFrame(inquiries)
new_blocks = joinOnCoordinate(blocks, inquiries)
new_blocks
/Users/atharvashirke/pcs2022/doublespeak/.venv/lib/python3.10/site-packages/geopandas/array.py:344: UserWarning: Geometry is in a geographic CRS. Results from 'sjoin_nearest' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. warnings.warn(
| index_left | Latitude | Longitude | Request_Category | APN | serviceRequests | requestCategories | area | blocks_id | perimeter | objectid | genplan | zoneclass | new_block | zone | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 24771 | 37.903241 | -122.274976 | Refuse and Recycling | 062 294902700 | 11 | Refuse and Recycling | 256240.562 | 3 | 2942.173 | 1 | LDR | R-1H | 062 2949 | R | MULTIPOLYGON (((-122.27356 37.90460, -122.2735... |
| 1 | 24888 | 37.904229 | -122.269848 | Refuse and Recycling | 063 295102304 | 9 | Refuse and Recycling,General Questions/informa... | 111175 | 5 | 1484.545 | 2 | LDR | R-1H | 063 2951 | R | MULTIPOLYGON (((-122.26997 37.90433, -122.2699... |
| 2 | 24749 | 37.903118 | -122.277698 | Refuse and Recycling | 062 294205500 | 10 | Refuse and Recycling,General Questions/informa... | 170184.812 | 6 | 2637.073 | 3 | LDR | R-1H | 062 2942 | R | MULTIPOLYGON (((-122.27484 37.90388, -122.2745... |
| 3 | 24461 | 37.901399 | -122.266383 | Traffic and Transportation | 063 310002500 | 5 | Traffic and Transportation,Refuse and Recyclin... | 307217.938 | 7 | 3510.71 | 4 | LDR | R-1H | 063 3100 | R | MULTIPOLYGON (((-122.26898 37.90397, -122.2684... |
| 4 | 24671 | 37.902659 | -122.272248 | Refuse and Recycling | 062 294503200 | 8 | Refuse and Recycling,General Questions/informa... | 101674.438 | 8 | 1421.052 | 5 | LDR | R-1H | 062 2945 | R | MULTIPOLYGON (((-122.27198 37.90388, -122.2719... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1855 | 17037 | 37.881194 | -122.297565 | Government Activity | 060 235500804 | 10 | Government Activity,Streets, Utilities, and Tr... | 157345.375 | 436 | 1725.794 | 1856 | M | MULI | 060 2355 | MULI | MULTIPOLYGON (((-122.29736 37.88114, -122.2974... |
| 1856 | 5029 | 37.858108 | -122.289271 | General Questions/information | 054 176300800 | 14 | General Questions/information,Refuse and Recyc... | 57831.625 | 1113 | 1126.78 | 1857 | M | MULI | 054 1763 | MULI | MULTIPOLYGON (((-122.28949 37.85835, -122.2893... |
| 1857 | 325 | 37.848776 | -122.269204 | Streets, Utilities, and Transportation | 052 152700103 | 2 | Streets, Utilities, and Transportation | 63278.188 | 1532 | 1248.658 | 1858 | NC | C-SA | 052 1527 | C | MULTIPOLYGON (((-122.26951 37.84853, -122.2695... |
| 1858 | 16 | 37.846375 | -122.273335 | Refuse and Recycling | 052 143500102 | 68 | Refuse and Recycling,Traffic and Transportatio... | 77694.688 | 1556 | 1456.261 | 1859 | NC | C-AC | 052 1435 | C | MULTIPOLYGON (((-122.27333 37.84618, -122.2733... |
| 1859 | 401 | 37.849198 | -122.273006 | Environmental Services and Programs | 052 153300400 | 21 | Environmental Services and Programs,Refuse and... | 28538.438 | 1525 | 718.455 | 1860 | MDR | R-2A | 052 1533 | R | MULTIPOLYGON (((-122.27278 37.84918, -122.2732... |
1860 rows × 16 columns
new_blocks = new_blocks.drop(columns=["Latitude", "Longitude", "index_left", "Request_Category"])
new_blocks = new_blocks.rename(columns={"requestCategories":"inquiryCategories", "serviceRequests":"inquiries"})
new_blocks.explore()
len(blocks) == len(new_blocks)
True
The range of service request values is really large. We'll add a log feature for better visualization.
new_blocks["ln_inquiries"] = np.log(new_blocks.inquiries)
new_blocks.explore(
column="ln_inquiries",
scheme="Quantiles",
tooltip="inquiries",
k = 20,
popup=True,
)
Chloropleth visualization of service requests in Berkeley by block.
Next we'll try to integrate some public safety data. We'll start with Berkeley City Auditor Police Response dataset. This rich dataset should provide us information regarding police responses from 2015-2019 fiscal years.
police = makeDataFrame("raw/pdResponse.csv")
police
/var/folders/mn/xk12q_t57y1889l8g92dyp8m0000gn/T/ipykernel_25295/845972379.py:9: DtypeWarning: Columns (9,13) have mixed types. Specify dtype option on import or set low_memory=False. return pd.read_csv(targetPath)
| CFSID | IncidentNumber | Address | Lat | Lon | Address Location Type | POLICE AREA | CallType | CallType Description | Priority | ... | Call Disposition | UNIT DISPOSITION | Occurred Incident Type | UCR Return A Code | PrimaryUnitFlag | UnitNumber | DispatchTime | EnRouteTime | OnSceneTime | ClearTime | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -683358 | 2015-00000001 | 2200 BLOCK CHANNING WAY BERKELEY 94704 | 37.866528 | -122.263839 | Address | 6W | 1042 | CFS 1042 - Welfare Check | 2 | ... | MDT Narrative Only | NaN | NaN | NaN | 0 | S4 | 01/01/2015 12:14:52 AM | NaN | 2015-01-01T00:15:02.000 | 01/01/2015 12:19:38 AM |
| 1 | -683358 | 2015-00000001 | 2200 BLOCK CHANNING WAY BERKELEY 94704 | 37.866528 | -122.263839 | Address | 6W | 1042 | CFS 1042 - Welfare Check | 2 | ... | MDT Narrative Only | NaN | NaN | NaN | 1 | 3A6 | 01/01/2015 12:14:49 AM | NaN | 2015-01-01T00:15:04.000 | 01/01/2015 12:43:03 AM |
| 2 | -683359 | 2015-00000007 | 2000 BLOCK SHATTUCK AVE BERKELEY 94704 | 37.871559 | -122.268679 | Address | 4E | 415E | CFS 415E - Noise Disturbance | 4 | ... | MDT Narrative Only | NaN | NaN | NaN | 1 | S31 | 01/01/2015 12:43:54 AM | 2015-01-01T00:44:40.000 | 2015-01-01T00:45:51.000 | 01/01/2015 12:49:32 AM |
| 3 | -683364 | 2015-00000006 | 2400 BLOCK GRANT ST BERKELEY 94703 | 37.864138 | -122.274771 | Address | 5W | AID | CFS AID - Aid to Citizen | 4 | ... | MDT Narrative Only | NaN | NaN | NaN | 1 | S31 | 01/01/2015 12:42:13 AM | 2015-01-01T00:42:15.000 | NaN | 01/01/2015 12:42:54 AM |
| 4 | -683367 | 2015-00000014 | 2200 BLOCK GRANT ST BERKELEY 94703 | 37.867370 | -122.275101 | Address | 5W | 415E | CFS 415E - Noise Disturbance | 4 | ... | MDT Narrative Only | NaN | NaN | NaN | 1 | 4A5 | 01/01/2015 01:38:10 AM | 2015-01-01T01:38:12.000 | 2015-01-01T01:39:45.000 | 01/01/2015 01:40:53 AM |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 750107 | -1333694 | 2020-00045341 | SAN PABLO AVE BERKELEY 94702 | 37.873443 | -122.293531 | Intersection | 15S | T | CFS T - Traffic Stop | 0 | ... | RIPA | NaN | NaN | NaN | 0 | 4A13 | 09/30/2020 11:53:25 PM | 2020-09-30T23:54:25.000 | NaN | 09/30/2020 11:56:02 PM |
| 750108 | -1333694 | 2020-00045341 | SAN PABLO AVE BERKELEY 94702 | 37.873443 | -122.293531 | Intersection | 15S | T | CFS T - Traffic Stop | 0 | ... | RIPA | NaN | NaN | NaN | 1 | 4B9 | 09/30/2020 11:53:21 PM | 2020-09-30T23:53:21.000 | 2020-09-30T23:53:21.000 | 09/30/2020 11:56:14 PM |
| 750109 | -1333695 | 2020-00045342 | 8TH ST BERKELEY 94710 | 37.865704 | -122.294634 | Intersection | 14S | 1194 | CFS 1194 - Pedestrian Stop | 0 | ... | NaN | RIPA | NaN | NaN | 0 | 4A3 | 09/30/2020 11:59:23 PM | 2020-09-30T23:59:29.000 | 2020-09-30T23:59:29.000 | 10/01/2020 12:12:32 AM |
| 750110 | -1333695 | 2020-00045342 | 8TH ST BERKELEY 94710 | 37.865704 | -122.294634 | Intersection | 14S | 1194 | CFS 1194 - Pedestrian Stop | 0 | ... | NaN | RIPA | NaN | NaN | 0 | 4A13 | 09/30/2020 11:58:21 PM | 2020-09-30T23:58:25.000 | 2020-10-01T00:15:20.000 | 10/01/2020 12:31:54 AM |
| 750111 | -1333695 | 2020-00045342 | 8TH ST BERKELEY 94710 | 37.865704 | -122.294634 | Intersection | 14S | 1194 | CFS 1194 - Pedestrian Stop | 0 | ... | NaN | RIPA | NaN | NaN | 1 | 4B9 | 09/30/2020 11:58:14 PM | 2020-09-30T23:58:14.000 | 2020-09-30T23:58:14.000 | 10/01/2020 12:17:10 AM |
750112 rows × 22 columns
police = police.loc[:, ["Lat", "Lon", "CallType Description", "Occurred Incident Type"]]
police
| Lat | Lon | CallType Description | Occurred Incident Type | |
|---|---|---|---|---|
| 0 | 37.866528 | -122.263839 | CFS 1042 - Welfare Check | NaN |
| 1 | 37.866528 | -122.263839 | CFS 1042 - Welfare Check | NaN |
| 2 | 37.871559 | -122.268679 | CFS 415E - Noise Disturbance | NaN |
| 3 | 37.864138 | -122.274771 | CFS AID - Aid to Citizen | NaN |
| 4 | 37.867370 | -122.275101 | CFS 415E - Noise Disturbance | NaN |
| ... | ... | ... | ... | ... |
| 750107 | 37.873443 | -122.293531 | CFS T - Traffic Stop | NaN |
| 750108 | 37.873443 | -122.293531 | CFS T - Traffic Stop | NaN |
| 750109 | 37.865704 | -122.294634 | CFS 1194 - Pedestrian Stop | NaN |
| 750110 | 37.865704 | -122.294634 | CFS 1194 - Pedestrian Stop | NaN |
| 750111 | 37.865704 | -122.294634 | CFS 1194 - Pedestrian Stop | NaN |
750112 rows × 4 columns
police.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 750112 entries, 0 to 750111 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Lat 750112 non-null float64 1 Lon 750112 non-null float64 2 CallType Description 750061 non-null object 3 Occurred Incident Type 155651 non-null object dtypes: float64(2), object(2) memory usage: 22.9+ MB
Looks like a very small portion of this dataset has confirmed incidents. Let's filter out calls that don't have confirmed incidents. and just have a dataframe with coordinates and occured crimes.
police = police[~police["Occurred Incident Type"].isna()].loc[:, ["Lat", "Lon", "Occurred Incident Type"]]
police = police.rename(columns={"Lat":"Latitude", "Lon":"Longitude"})
counts = police.groupby(["Latitude", "Longitude"]).count()["Occurred Incident Type"]
incidentTypes = police.groupby(["Latitude", "Longitude"]).agg(lambda x: ','.join(x.unique()))["Occurred Incident Type"]
police = police.groupby(["Latitude", "Longitude"]).first()
police["confirmedCases"] = counts
police["incidentTypes"] = incidentTypes
police = police.drop(columns=["Occurred Incident Type"])
police = police.reset_index()
police
| Latitude | Longitude | confirmedCases | incidentTypes | |
|---|---|---|---|---|
| 0 | -361.000000 | -361.000000 | 1147 | Narcotics Violation,Fraud/Forgery,Vehicle Code... |
| 1 | 37.634523 | -122.112417 | 2 | Fraud/Forgery |
| 2 | 37.660245 | -122.091605 | 2 | Narcotics Violation |
| 3 | 37.668383 | -122.087013 | 21 | Narcotics Violation,Fraud/Forgery |
| 4 | 37.675897 | -121.779499 | 1 | Identity Theft |
| ... | ... | ... | ... | ... |
| 12655 | 37.904305 | -122.269621 | 8 | Vehicle Stolen,Burglary Commercial |
| 12656 | 37.904328 | -122.273225 | 10 | Burglary Residential |
| 12657 | 37.904358 | -122.270274 | 5 | Vandalism,Burglary Commercial |
| 12658 | 37.904620 | -122.272948 | 2 | Vandalism,Vehicle Stolen |
| 12659 | 37.904875 | -122.273458 | 11 | Theft from Auto,Vandalism |
12660 rows × 4 columns
Item 0 is clearly a null value they place confirmed cases without a location. We should drop this value.
police = police.drop(index=0)
police = makeGeoDataFrame(police)
newer_blocks = joinOnCoordinate(new_blocks, police)
/Users/atharvashirke/pcs2022/doublespeak/.venv/lib/python3.10/site-packages/geopandas/array.py:344: UserWarning: Geometry is in a geographic CRS. Results from 'sjoin_nearest' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. warnings.warn(
newer_blocks
| index_left | Latitude | Longitude | confirmedCases | incidentTypes | APN | inquiries | inquiryCategories | area | blocks_id | perimeter | objectid | genplan | zoneclass | new_block | zone | geometry | ln_inquiries | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 12656 | 37.904328 | -122.273225 | 10 | Burglary Residential | 062 294902700 | 11 | Refuse and Recycling | 256240.562 | 3 | 2942.173 | 1 | LDR | R-1H | 062 2949 | R | MULTIPOLYGON (((-122.27356 37.90460, -122.2735... | 2.397895 |
| 1 | 12617 | 37.902783 | -122.269852 | 4 | Death Investigation | 063 295102304 | 9 | Refuse and Recycling,General Questions/informa... | 111175 | 5 | 1484.545 | 2 | LDR | R-1H | 063 2951 | R | MULTIPOLYGON (((-122.26997 37.90433, -122.2699... | 2.197225 |
| 2 | 12618 | 37.902790 | -122.275249 | 1 | Identity Theft | 062 294205500 | 10 | Refuse and Recycling,General Questions/informa... | 170184.812 | 6 | 2637.073 | 3 | LDR | R-1H | 062 2942 | R | MULTIPOLYGON (((-122.27484 37.90388, -122.2745... | 2.302585 |
| 3 | 12613 | 37.902589 | -122.266585 | 1 | Vehicle Stolen | 063 310002500 | 5 | Traffic and Transportation,Refuse and Recyclin... | 307217.938 | 7 | 3510.71 | 4 | LDR | R-1H | 063 3100 | R | MULTIPOLYGON (((-122.26898 37.90397, -122.2684... | 1.609438 |
| 4 | 12620 | 37.902830 | -122.272560 | 1 | Vehicle Stolen | 062 294503200 | 8 | Refuse and Recycling,General Questions/informa... | 101674.438 | 8 | 1421.052 | 5 | LDR | R-1H | 062 2945 | R | MULTIPOLYGON (((-122.27198 37.90388, -122.2719... | 2.079442 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1855 | 10490 | 37.881783 | -122.297754 | 36 | Vandalism,Burglary Commercial,Robbery | 060 235500804 | 10 | Government Activity,Streets, Utilities, and Tr... | 157345.375 | 436 | 1725.794 | 1856 | M | MULI | 060 2355 | MULI | MULTIPOLYGON (((-122.29736 37.88114, -122.2974... | 2.302585 |
| 1856 | 3117 | 37.858415 | -122.289523 | 5 | Vandalism | 054 176300800 | 14 | General Questions/information,Refuse and Recyc... | 57831.625 | 1113 | 1126.78 | 1857 | M | MULI | 054 1763 | MULI | MULTIPOLYGON (((-122.28949 37.85835, -122.2893... | 2.639057 |
| 1857 | 374 | 37.848766 | -122.269190 | 10 | Narcotics Violation | 052 152700103 | 2 | Streets, Utilities, and Transportation | 63278.188 | 1532 | 1248.658 | 1858 | NC | C-SA | 052 1527 | C | MULTIPOLYGON (((-122.26951 37.84853, -122.2695... | 0.693147 |
| 1858 | 131 | 37.846396 | -122.273312 | 20 | Disturbance,Theft Misdemeanor (Under $950),Dom... | 052 143500102 | 68 | Refuse and Recycling,Traffic and Transportatio... | 77694.688 | 1556 | 1456.261 | 1859 | NC | C-AC | 052 1435 | C | MULTIPOLYGON (((-122.27333 37.84618, -122.2733... | 4.219508 |
| 1859 | 445 | 37.849303 | -122.273144 | 2 | Assault/Battery Misdemeanor | 052 153300400 | 21 | Environmental Services and Programs,Refuse and... | 28538.438 | 1525 | 718.455 | 1860 | MDR | R-2A | 052 1533 | R | MULTIPOLYGON (((-122.27278 37.84918, -122.2732... | 3.044522 |
1860 rows × 18 columns
newer_blocks.explore(
column="confirmedCases",
scheme="Quantiles",
tooltip="confirmedCases",
k = 20,
popup=True,
)
/Users/atharvashirke/pcs2022/doublespeak/.venv/lib/python3.10/site-packages/mapclassify/classifiers.py:238: UserWarning: Warning: Not enough unique values in array to form k classes
Warn(
/Users/atharvashirke/pcs2022/doublespeak/.venv/lib/python3.10/site-packages/mapclassify/classifiers.py:241: UserWarning: Warning: setting k to 11
Warn("Warning: setting k to %d" % k_q, UserWarning)
Chloropleth map of police response activity to confirmed cases.